########################################################
###           NSFG with MORGAN                      ###
###           MAXIMUM LIKELIHOOD CLASS              ###
###           02/26/2013                            ###
#######################################################

rm(list=ls(all=TRUE))

library(foreign)
library(MASS)
library(RColorBrewer)

# Load data
wd<-c("C:/Users/Felicia/Dropbox/My papers/sex composition in NSFG with Morgan/results/work")
setwd(wd)
data = read.dta("intent.dta")
ASR = data[which(data$survey==1988 | data$survey==1995),]
NSFG1982 = data[which(data$survey==1982),]
NSFG1988 = data[which(data$survey==1988),]   
NSFG1995 = data[which(data$survey==1995),]
NSFG2002 = data[which(data$survey==2002),]
NSFG2006 = data[which(data$survey==2006),]

# m1: full sample: without controls
  m1 = glm(intent ~ samesex+as.factor(survey)+samesex*as.factor(survey), data=ASR, family=binomial(link="logit"))
  m1.1988 = glm(intent ~ samesex, data=NSFG1988, family=binomial(link="logit"))
  m1.1995 = glm(intent ~ samesex, data=NSFG1995, family=binomial(link="logit"))
  m1.1982 = glm(intent ~ samesex, data=NSFG1982, family=binomial(link="logit"))
  m1.2002 = glm(intent ~ samesex, data=NSFG2002, family=binomial(link="logit"))
  m1.2006 = glm(intent ~ samesex, data=NSFG2006, family=binomial(link="logit"))
    coef.m1 = round(c(coef(m1.1982)[2],coef(m1.1988)[2],coef(m1.1995)[2],coef(m1.2002)[2],coef(m1.2006)[2]), digits=3)
    se.m1 = round(c(sqrt(diag(vcov(m1.1982)))[2],sqrt(diag(vcov(m1.1988)))[2],sqrt(diag(vcov(m1.1995)))[2],sqrt(diag(vcov(m1.2002)))[2],sqrt(diag(vcov(m1.2006)))[2]), digits=3)
    n.m1 = c(m1.1982$df.null+1, m1.1988$df.null+1, m1.1995$df.null+1, m1.2002$df.null+1, m1.2006$df.null+1)
    sig.m1 = round(c(summary(m1.1982)$coef[2,4],summary(m1.1988)$coef[2,4],summary(m1.1995)$coef[2,4],summary(m1.2002)$coef[2,4],summary(m1.2006)$coef[2,4]), digits=3)
     

# m2: full sample + demographic controls
  m2 = glm(intent ~ samesex+as.factor(survey)+samesex*as.factor(survey)+age+marital+educat+race, data=ASR, family=binomial(link="logit"))
  m2.1988 = glm(intent ~ samesex+age+marital+educat+race, data=NSFG1988, family=binomial(link="logit"))
  m2.1995 = glm(intent ~ samesex+age+marital+educat+race, data=NSFG1995, family=binomial(link="logit"))
  m2.1982 = glm(intent ~ samesex+age+marital+educat+race, data=NSFG1982, family=binomial(link="logit"))
  m2.2002 = glm(intent ~ samesex+age+marital+educat+race, data=NSFG2002, family=binomial(link="logit"))
  m2.2006 = glm(intent ~ samesex+age+marital+educat+race, data=NSFG2006, family=binomial(link="logit"))
    coef.m2 = round(c(coef(m2.1982)[2],coef(m2.1988)[2],coef(m2.1995)[2],coef(m2.2002)[2],coef(m2.2006)[2]), digits=3)
    se.m2 = round(c(sqrt(diag(vcov(m2.1982)))[2],sqrt(diag(vcov(m2.1988)))[2],sqrt(diag(vcov(m2.1995)))[2],sqrt(diag(vcov(m2.2002)))[2],sqrt(diag(vcov(m2.2006)))[2]), digits=3)
    n.m2 = c(m2.1982$df.null+1, m2.1988$df.null+1, m2.1995$df.null+1, m2.2002$df.null+1, m2.2006$df.null+1)
    sig.m2 = round(c(summary(m2.1982)$coef[2,4],summary(m2.1988)$coef[2,4],summary(m2.1995)$coef[2,4],summary(m2.2002)$coef[2,4],summary(m2.2006)$coef[2,4]), digits=3)
      
# m3: currently married sample:  + demographic controls
  married.ASR = ASR[which(ASR$marital=="currently married"),]
  married.1988 = NSFG1988[which(NSFG1988$marital=="currently married"),]
  married.1995 = NSFG1995[which(NSFG1995$marital=="currently married"),]
  married.1982 = NSFG1982[which(NSFG1982$marital=="currently married"),]
  married.2002 = NSFG2002[which(NSFG2002$marital=="currently married"),]
  married.2006 = NSFG2006[which(NSFG2006$marital=="currently married"),]

  m3 = glm(intent ~ samesex+as.factor(survey)+samesex*as.factor(survey)+age+educat+race, data=married.ASR, family=binomial(link="logit"))
  m3.1988 = glm(intent ~ samesex+age+educat+race, data=married.1988, family=binomial(link="logit"))
  m3.1995 = glm(intent ~ samesex+age+educat+race, data=married.1995, family=binomial(link="logit"))
  m3.1982 = glm(intent ~ samesex+age+educat+race, data=married.1982, family=binomial(link="logit"))
  m3.2002 = glm(intent ~ samesex+age+educat+race, data=married.2002, family=binomial(link="logit"))
  m3.2006 = glm(intent ~ samesex+age+educat+race, data=married.2006, family=binomial(link="logit"))
    coef.m3 = round(c(coef(m3.1982)[2],coef(m3.1988)[2],coef(m3.1995)[2],coef(m3.2002)[2],coef(m3.2006)[2]), digits=3)
    se.m3 = round(c(sqrt(diag(vcov(m3.1982)))[2],sqrt(diag(vcov(m3.1988)))[2],sqrt(diag(vcov(m3.1995)))[2],sqrt(diag(vcov(m3.2002)))[2],sqrt(diag(vcov(m3.2006)))[2]), digits=3)
    n.m3 = c(m3.1982$df.null+1, m3.1988$df.null+1, m3.1995$df.null+1, m3.2002$df.null+1, m3.2006$df.null+1)
    sig.m3 = round(c(summary(m3.1982)$coef[2,4],summary(m3.1988)$coef[2,4],summary(m3.1995)$coef[2,4],summary(m3.2002)$coef[2,4],summary(m3.2006)$coef[2,4]), digits=3)


# m4: full sample + demograhic and socioeconomic controls
  m4 = glm(intent ~ samesex+as.factor(survey)+samesex*as.factor(survey)+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+rural+abortion, data=ASR, family=binomial(link="logit"))
  m4.1988 = glm(intent ~ samesex+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+smsa+abortion, data=NSFG1988, family=binomial(link="logit"))
  m4.1995 = glm(intent ~ samesex+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+smsa+abortion, data=NSFG1995, family=binomial(link="logit"))
  m4.1982 = glm(intent ~ samesex+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+smsa+abortion, data=NSFG1982, family=binomial(link="logit"))
  m4.2002 = glm(intent ~ samesex+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+smsa+abortion, data=NSFG2002, family=binomial(link="logit"))
  m4.2006 = glm(intent ~ samesex+age+marital+educat+race+income+catholic+fulltime+as.factor(educat_m)+smsa+abortion, data=NSFG2006, family=binomial(link="logit"))
    coef.m4 = round(c(coef(m4.1982)[2],coef(m4.1988)[2],coef(m4.1995)[2],coef(m4.2002)[2],coef(m4.2006)[2]), digits=3)
    se.m4 = round(c(sqrt(diag(vcov(m4.1982)))[2],sqrt(diag(vcov(m4.1988)))[2],sqrt(diag(vcov(m4.1995)))[2],sqrt(diag(vcov(m4.2002)))[2],sqrt(diag(vcov(m4.2006)))[2]),digits=3)
    n.m4 = c(m4.1982$df.null+1, m4.1988$df.null+1, m4.1995$df.null+1, m4.2002$df.null+1, m4.2006$df.null+1)
    sig.m4 = round(c(summary(m4.1982)$coef[2,4],summary(m4.1988)$coef[2,4],summary(m4.1995)$coef[2,4],summary(m4.2002)$coef[2,4],summary(m4.2006)$coef[2,4]), digits=3)

##build coef model
  year = c("1982","1988","1995","2002","2006")
  model = data.frame(year, 
                     coef.m1, se.m1, sig.m1, n.m1, 
                     coef.m2, se.m2, sig.m2, n.m2, 
                     coef.m3, se.m3, sig.m3, n.m3, 
                     coef.m4, se.m4, sig.m4, n.m4)

#####################################################################################################
##Estimation with uncertainty
    ## scenario: married, college-educated, white, full-time, catholic, living in MSA, never had abortion


#1982 NSFG
  samesex.1982 = c(1,1,mean(NSFG1982$age),0,0,0,0,1,0,0,0,mean(NSFG1982$income, na.rm=TRUE),1,1,0,1,0,0,0)
  mixsex.1982 = c(1,0,mean(NSFG1982$age),0,0,0,0,1,0,0,0,mean(NSFG1982$income, na.rm=TRUE),1,1,0,1,0,0,0)
  
  draw.1982 = mvrnorm(n=1000, m4.1982$coefficients, vcov(m4.1982))
  pred.samesex.1982 = exp(draw.1982 %*% samesex.1982)/(1+exp(draw.1982 %*% samesex.1982))
  pred.mixsex.1982 = exp(draw.1982 %*% mixsex.1982)/(1+exp(draw.1982 %*% mixsex.1982))

  plot(density(pred.samesex.1982), xlim=c(0,0.6), ylim=c(0,12), xlab="predicted probabilities", col="black", lty=1, main="NSFG1982")
    lines(density(pred.mixsex.1982), col="red", lty=2)
    legend("topright",c("mixed-sex children","same-sex children"),col=c("red","black"), lty=c(2,1),cex=0.8)

# 1988 NSFG
  samesex.1988 = c(1,1,mean(NSFG1988$age),0,0,0,0,1,0,0,0,mean(NSFG1988$income, na.rm=TRUE),1,1,0,1,0,0,0)
  mixsex.1988 = c(1,0,mean(NSFG1988$age),0,0,0,0,1,0,0,0,mean(NSFG1988$income, na.rm=TRUE),1,1,0,1,0,0,0)

  draw.1988 = mvrnorm(n=1000, m4.1988$coefficients, vcov(m4.1988))
  pred.samesex.1988 = exp(draw.1988 %*%samesex.1988)/(1+exp(draw.1988 %*% samesex.1988))
  pred.mixsex.1988 = exp(draw.1988 %*% mixsex.1988)/(1+exp(draw.1988 %*% mixsex.1988))

  plot(density(pred.samesex.1988), xlim=c(0,0.6), ylim=c(0,12), xlab="predicted probabilities", col="black", lty=1, main="NSFG1988")
    lines(density(pred.mixsex.1988), col="red", lty=2)
    legend("topright",c("mixed-sex children","same-sex children"),col=c("red","black"), lty=c(2,1),cex=0.8)

#NSFG1995
  samesex.1995 = c(1,1,mean(NSFG1995$age),0,0,0,0,1,0,0,0,mean(NSFG1995$income, na.rm=TRUE),1,1,0,1,0,0,0)
  mixsex.1995 = c(1,0,mean(NSFG1995$age),0,0,0,0,1,0,0,0,mean(NSFG1995$income, na.rm=TRUE),1,1,0,1,0,0,0)

  draw.1995 = mvrnorm(n=1000, m4.1995$coefficients, vcov(m4.1995))
  pred.samesex.1995 = exp(draw.1995 %*% samesex.1995)/(1+exp(draw.1995 %*% samesex.1995))
  pred.mixsex.1995 = exp(draw.1995 %*% mixsex.1995)/(1+exp(draw.1995 %*% mixsex.1995))

  plot(density(pred.samesex.1995), xlim=c(0,0.6), ylim=c(0,12), xlab="predicted probabilities", col="black", lty=1, main="NSFG1995")
    lines(density(pred.mixsex.1995), col="red", lty=2)
    legend("topright",c("mixed-sex children","same-sex children"),col=c("red","black"), lty=c(2,1),cex=0.8)

#NSFG2002
samesex.2002 = c(1,1,mean(NSFG2002$age),0,0,0,0,1,0,0,0,mean(NSFG2002$income, na.rm=TRUE),1,1,0,1,0,0,0)
mixsex.2002 = c(1,0,mean(NSFG2002$age),0,0,0,0,1,0,0,0,mean(NSFG2002$income, na.rm=TRUE),1,1,0,1,0,0,0)

draw.2002 = mvrnorm(n=1000, m4.2002$coefficients, vcov(m4.2002))
pred.samesex.2002 = exp(draw.2002 %*% samesex.2002)/(1+exp(draw.2002 %*% samesex.2002))
pred.mixsex.2002 = exp(draw.2002 %*% mixsex.2002)/(1+exp(draw.2002 %*% mixsex.2002))

plot(density(pred.samesex.2002), xlim=c(0,0.6), ylim=c(0,12), xlab="predicted probabilities", col="black", lty=1, main="NSFG2002")
lines(density(pred.mixsex.2002), col="red", lty=2)
    legend("topright",c("mixed-sex children","same-sex children"),col=c("red","black"), lty=c(2,1),cex=0.8)

#NSFG2006
samesex.2006 = c(1,1,mean(NSFG2006$age),0,0,0,0,1,0,0,0,mean(NSFG2006$income, na.rm=TRUE),1,1,0,1,0,0,0)
mixsex.2006 = c(1,0,mean(NSFG2006$age),0,0,0,0,1,0,0,0,mean(NSFG2006$income, na.rm=TRUE),1,1,0,1,0,0,0)

draw.2006 = mvrnorm(n=1000, m4.2006$coefficients, vcov(m4.2006))
pred.samesex.2006 = exp(draw.2006 %*% samesex.2006)/(1+exp(draw.2006 %*% samesex.2006))
pred.mixsex.2006 = exp(draw.2006 %*% mixsex.2006)/(1+exp(draw.2006 %*% mixsex.2006))

plot(density(pred.samesex.2006), xlim=c(0,0.6), ylim=c(0,12), xlab="predicted probabilities", col="black", lty=1, main="NSFG2006")
lines(density(pred.mixsex.2006), col="red", lty=2)
    legend("topright",c("mixed-sex children","same-sex children"),col=c("red","black"), lty=c(2,1),cex=0.8)
